In [1]:
import numpy as np 
import pandas as pd 
import scipy as sp
In [3]:
#Loading the Dataset
state_df = pd.read_csv(r'C:\Users\nisha\Desktop\data\state_lakes.csv')
In [5]:
#First 5 values of the dataset
state_df.head()
Out[5]:
lfenzid lake_name indicator indicator_name measurement value units region
0 0 Lake Rototoa CHLA Chlorophyll-a Median 2.250 mg/m3 Auckland
1 0 Lake Rototoa ECOLI E. coli Median 5.000 cfu/100mL Auckland
2 0 Lake Rototoa NH4N Ammoniacal nitrogen Median 0.005 mg/L Auckland
3 0 Lake Rototoa NH4N_adj Ammoniacal nitrogen Median 0.005 mg/L Auckland
4 0 Lake Rototoa CLAR Clarity Median 3.875 m Auckland
In [7]:
state_df.describe()
Out[7]:
lfenzid value
count 592.000000 592.000000
mean 31293.190878 3.987730
std 16225.990820 13.924649
min 0.000000 0.000220
25% 17761.000000 0.013875
50% 26562.000000 0.475250
75% 48177.000000 3.885225
max 54742.000000 238.000000
In [9]:
state_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 592 entries, 0 to 591
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   lfenzid         592 non-null    int64  
 1   lake_name       592 non-null    object 
 2   indicator       592 non-null    object 
 3   indicator_name  592 non-null    object 
 4   measurement     592 non-null    object 
 5   value           592 non-null    float64
 6   units           509 non-null    object 
 7   region          556 non-null    object 
dtypes: float64(1), int64(1), object(6)
memory usage: 37.1+ KB
In [11]:
import matplotlib.pyplot as plt
import seaborn as sns 
import os 
import warnings
In [13]:
#To view Various Contamination Levels Per Regions
plt.figure(figsize=(14, 8))
sns.countplot(data=state_df, y='region', hue='indicator_name')
plt.title('Indicator Name per Region')
plt.xlabel('Count')
plt.ylabel('Region')
plt.tight_layout()
plt.show()
No description has been provided for this image
In [17]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def add_real_coordinates(df):
    """
    Add real coordinates for lakes in the Canterbury region
    """
    # Dictionary of real lake coordinates
    lake_coordinates = {
        'Wainono Lagoon': (-44.7015, 171.1419),
        'Lake Forsyth (Lake Wairewa)': (-43.8088, 172.7176),
        'Keland Pond': (-43.4545, 172.1892),
        'Lake Ellesmere (Te Waihora)': (-43.7998, 172.4541),
        'Coopers Lagoon': (-43.9522, 172.2157),
        'Lake Rotorua (South)': (-43.9800, 171.0600)
    }
    
    # Create a DataFrame with lake coordinates
    lake_coords_df = pd.DataFrame([
        {'lake_name': lake, 'latitude': lat, 'longitude': lon}
        for lake, (lat, lon) in lake_coordinates.items()
    ])
    
    # Merge with the original dataframe
    merged_df = df.merge(lake_coords_df, on='lake_name', how='left')
    
    # If any lakes don't have coordinates in our dictionary, log them
    missing_coords = merged_df[merged_df['latitude'].isna()]['lake_name'].unique()
    if len(missing_coords) > 0:
        print(f"Warning: Missing coordinates for lakes: {missing_coords}")
        
        # For lakes without real coordinates, apply the simulation method
        missing_lakes_df = df[df['lake_name'].isin(missing_coords)]
        sim_coords = simulate_coordinates_for_missing(missing_lakes_df)
        
        # Update the merged dataframe with simulated coordinates for missing lakes
        for lake, (lat, lon) in sim_coords.items():
            merged_df.loc[merged_df['lake_name'] == lake, 'latitude'] = lat
            merged_df.loc[merged_df['lake_name'] == lake, 'longitude'] = lon
    
    return merged_df

def simulate_coordinates_for_missing(df):
    """
    Simulate coordinates for lakes without real coordinates
    """
    # Get unique lakes missing coordinates
    missing_lakes = df['lake_name'].unique()
    
    # Canterbury region approximate center and bounds
    center_lat = -43.5
    center_lon = 171.75
    
    # Create simulated positions
    np.random.seed(42)  # For reproducibility
    coordinates = {}
    for lake in missing_lakes:
        # Distribute lakes across the region with some clustering
        lat = center_lat + np.random.normal(0, 0.5)  
        lon = center_lon + np.random.normal(0, 0.75)
        
        # Keep within realistic bounds for Canterbury
        lat = max(min(lat, -42.5), -44.5)
        lon = max(min(lon, 173.5), 170.0)
        
        coordinates[lake] = (lat, lon)
    
    return coordinates

# Function to prepare data for the bubble map
def prepare_map_data(df):
    # Filter for Canterbury region and contaminants of interest
    canterbury_df = df[df['region'] == 'Canterbury']
    filtered_df = canterbury_df[canterbury_df['indicator_name'].isin(['Ammoniacal nitrogen', 'E. coli'])]
    
    # Add real coordinates (with fallback to simulated for any missing lakes)
    map_df = add_real_coordinates(filtered_df)
    
    # Create a pivot table to have both contaminants as columns
    pivot_df = map_df.pivot_table(
        index=['lake_name', 'latitude', 'longitude'], 
        columns='indicator_name', 
        values='value',
        aggfunc='mean'
    ).reset_index()
    
    # Rename columns to make them more accessible
    pivot_df.columns.name = None
    
    # Handle missing values - some lakes might not have data for both contaminants
    if 'Ammoniacal nitrogen' not in pivot_df.columns:
        pivot_df['Ammoniacal nitrogen'] = np.nan
    if 'E. coli' not in pivot_df.columns:
        pivot_df['E. coli'] = np.nan
    
    return pivot_df

# Function to create an interactive bubble map
def create_bubble_map(data, indicator):
    # Define size reference for the bubbles based on contamination levels
    # Normalize the values for better visualization
    min_val = data[indicator].min()
    max_val = data[indicator].max()
    
    # Skip if no data
    if pd.isna(min_val) or pd.isna(max_val) or min_val == max_val:
        print(f"No valid data range for {indicator}")
        return None
    
    # Create normalized size values for bubbles
    data['size'] = 10 + ((data[indicator] - min_val) / (max_val - min_val)) * 40
    
    # Create hover text
    data['hover_text'] = data.apply(
        lambda row: f"<b>{row['lake_name']}</b><br>" +
                   f"{indicator}: {row[indicator]:.3f}",
        axis=1
    )
    
    # Create the map
    fig = px.scatter_mapbox(
        data,
        lat="latitude",
        lon="longitude",
        size="size",
        color=indicator,
        hover_name="lake_name",
        hover_data={
            "size": False,
            "latitude": False,
            "longitude": False,
            indicator: ':.3f'
        },
        zoom=7,
        height=700,
        width=1000,
        color_continuous_scale=px.colors.sequential.Blues,
        title=f"Canterbury Lakes: {indicator} Contamination Levels",
        center={"lat": -43.8, "lon": 171.8},  # Adjusted center for real coordinates
        mapbox_style="open-street-map"
    )
    
    
    fig.update_layout(
        margin={"r": 0, "t": 50, "l": 0, "b": 0},
        coloraxis_colorbar=dict(
            title=indicator,
            thicknessmode="pixels", thickness=20,
            lenmode="pixels", len=300,
            yanchor="top", y=1,
            ticks="outside"
        )
    )
    
    return fig

# Function to create tabs with both contaminants
def create_combined_map(state_df):
    # Prepare data
    map_data = prepare_map_data(state_df)
    
    if map_data.empty:
        print("No data available for mapping")
        return None
    
    # Create a figure with subplots (tabs)
    fig = make_subplots(
        rows=1, cols=2,
        specs=[[{"type": "mapbox"}, {"type": "mapbox"}]],
        subplot_titles=["Ammoniacal Nitrogen", "E. coli"]
    )
    
    # Add first map - Ammoniacal nitrogen
    nitrogen_fig = create_bubble_map(map_data.copy(), "Ammoniacal nitrogen")
    if nitrogen_fig:
        for trace in nitrogen_fig.data:
            fig.add_trace(trace, row=1, col=1)
    
    # Add second map - E. coli
    ecoli_fig = create_bubble_map(map_data.copy(), "E. coli")
    if ecoli_fig:
        for trace in ecoli_fig.data:
            fig.add_trace(trace, row=1, col=2)
    
    # Update layout with mapbox settings
    fig.update_layout(
        height=700,
        width=1800,
        title_text="Canterbury Lakes Contamination Levels",
        title_x=0.5,
        mapbox=dict(
            style="open-street-map",
            zoom=7,
            center=dict(lat=-43.8, lon=171.8)  # Adjusted center for real coordinates
        ),
        mapbox2=dict(
            style="open-street-map",
            zoom=7,
            center=dict(lat=-43.8, lon=171.8)  # Adjusted center for real coordinates
        )
    )
    
    return fig

# Function to create a single map with toggle for contaminants
def create_toggle_map(state_df):
    # Prepare data
    map_data = prepare_map_data(state_df)
    
    if map_data.empty:
        print("No data available for mapping")
        return None
    
    # Create a figure with buttons to toggle between indicators
    nitrogen_fig = create_bubble_map(map_data.copy(), "Ammoniacal nitrogen")
    ecoli_fig = create_bubble_map(map_data.copy(), "E. coli")
    
    if not nitrogen_fig or not ecoli_fig:
        print("Could not create one or both maps")
        return None
    
    # Create the figure with updatemenus for toggling
    fig = go.Figure()
    
    # Add the nitrogen data
    for trace in nitrogen_fig.data:
        fig.add_trace(trace)
    
    # Add buttons for toggling between indicators
    fig.update_layout(
        updatemenus=[
            dict(
                type="buttons",
                direction="right",
                active=0,
                x=0.5,
                y=1.15,
                xanchor="center",
                yanchor="top",
                buttons=list([
                    dict(
                        label="Ammoniacal Nitrogen",
                        method="update",
                        args=[{"visible": [True] * len(nitrogen_fig.data) + [False] * len(ecoli_fig.data)},
                              {"title": "Canterbury Lakes: Ammoniacal Nitrogen Contamination Levels"}]
                    ),
                    dict(
                        label="E. coli",
                        method="update",
                        args=[{"visible": [False] * len(nitrogen_fig.data) + [True] * len(ecoli_fig.data)},
                              {"title": "Canterbury Lakes: E. coli Contamination Levels"}]
                    )
                ])
            )
        ]
    )
    
    # Add the E. coli data (initially hidden)
    for trace in ecoli_fig.data:
        trace.visible = False
        fig.add_trace(trace)
    
    # Update layout
    fig.update_layout(
        height=700,
        width=1000,
        title_text="Canterbury Lakes: Ammoniacal Nitrogen Contamination Levels",
        title_x=0.5,
        margin={"r": 0, "t": 100, "l": 0, "b": 0},
        mapbox=dict(
            style="open-street-map",
            zoom=7,
            center=dict(lat=-43.8, lon=171.8)  # Adjusted center for real coordinates
        )
    )
    
    return fig


def create_interactive_maps(state_df):
    # Create the side-by-side maps
    combined_map = create_combined_map(state_df)
    
    # Create the toggle map
    toggle_map = create_toggle_map(state_df)
    
    return combined_map, toggle_map
In [19]:
# Generate both map versions
combined_map, toggle_map = create_interactive_maps(state_df)

# Display the side-by-side maps showing both contaminants
combined_map.show()

# Display the toggle map where you can switch between contaminants
toggle_map.show()
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: